Notebook to play with slope coefficients before putting them into the report.
# Fetch the following sources and signals from the API
# TODO: Add Google Symptoms "eventually"
source_names = c("doctor-visits", "fb-survey", "fb-survey", "hospital-admissions")
signal_names = c("smoothed_adj_cli", "smoothed_cli", "smoothed_hh_cmnty_cli",
"smoothed_adj_covid19")
pretty_names = c("Doctor visits", "Facebook CLI", "Facebook CLI-in-community",
"Hospitalizations")
target_names = c("Cases", "Cases", "Cases", "Deaths")
geo_level = "county"
start_day = "2020-04-15"
end_day = NULL
cache_fname = 'cached_data/03_heterogeneity_core_indicators.RDS'
if (!file.exists(cache_fname)) {
df_signals = vector("list", length(signal_names))
for (i in 1:length(signal_names)) {
df_signals[[i]] = suppressWarnings(
covidcast_signal(source_names[i], signal_names[i],
start_day, end_day,
geo_type=geo_level))
}
# Fetch USAFacts confirmed case incidence proportion (smoothed with 7-day
# trailing average)
df_cases = suppressWarnings(
covidcast_signal("usa-facts", "confirmed_7dav_incidence_prop",
start_day, end_day,
geo_type=geo_level))
df_deaths = suppressWarnings(
covidcast_signal("usa-facts", "deaths_7dav_incidence_prop",
start_day, end_day,
geo_type=geo_level))
case_num = 500
geo_values = suppressWarnings(covidcast_signal("usa-facts", "confirmed_cumulative_num",
max(df_cases$time_value),
max(df_cases$time_value))) %>%
filter(value >= case_num) %>% pull(geo_value)
saveRDS(list(df_signals, df_cases, df_deaths), cache_fname)
} else {
cached_data = readRDS(cache_fname)
df_signals = cached_data[[1]]
df_cases = cached_data[[2]]
df_deaths = cached_data[[3]]
}
sensorize_time_ranges = list(
c(-7, -1),
c(-10, -1),
c(-14, -1),
c(-21, -1))
QUANTS = c(0.01, 0.99)
# TODO: Add more "core indicators"
for (ind_idx in 1:length(source_names)) {
if (target_names[ind_idx] == 'Cases') {
df_target = df_cases
} else if (target_names[ind_idx] == 'Deaths') {
df_target = df_deaths
} else {
stop(sprintf("No matching dataframe for target %s.", target_names[ind_idx]))
}
base_cor_fname = sprintf('results/03_base_cors_%s_%s.RDS',
source_names[ind_idx], signal_names[ind_idx])
sensorize_fname = sprintf('results/03_sensorize_cors_%s_%s.RDS',
source_names[ind_idx], signal_names[ind_idx])
sensorize_val_fname = sprintf('results/03_sensorize_vals_%s_%s.RDS',
source_names[ind_idx], signal_names[ind_idx])
df_cor_base = readRDS(base_cor_fname)
sensorize_cors = readRDS(sensorize_fname)
sensorized_vals = readRDS(sensorize_val_fname)
for (inner_idx in 1:length(sensorize_time_ranges)) {
sv = sensorized_vals[[inner_idx]]
print(summary(sv$slope))
print(slope_limits <- quantile(sv$slope, QUANTS, na.rm=TRUE))
plt = ggplot(
sensorized_vals[[inner_idx]],
aes(x=time_value,
y=slope),
) + geom_point (
alpha=0.1,
size=0.5,
) + geom_hline (
yintercept=0,
colour='white',
) + stat_summary (
aes(y=slope,
group=1,
colour='median'),
fun=median,
geom="line",
group=1,
) + stat_summary (
aes(y=slope,
group=1,
colour='+/- mad'),
fun=function(x) { median(x) + mad(x) },
geom="line",
group=1,
) + stat_summary (
aes(y=slope,
group=1,
colour='+/- mad'),
fun=function(x) { median(x) - mad(x) },
geom="line",
group=1,
) + scale_colour_manual(
values=c("median"="maroon",
"+/- mad"="darkgreen")
) + labs(
colour=''
) + ggtitle(
sprintf("Slope distribution for %s, fitted on t in %d:%d",
pretty_names[ind_idx],
sensorize_time_ranges[[inner_idx]][1],
sensorize_time_ranges[[inner_idx]][2])
) + ylim (
slope_limits[[1]], slope_limits[[2]]
)
print(plt)
}
}
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -3488425 -1 0 3085 2 574876473 11718
## 1% 99%
## -26.70333 27.73314
## Warning: Removed 19566 rows containing non-finite values (stat_summary).
## Warning: Removed 19566 rows containing non-finite values (stat_summary).
## Warning: Removed 19566 rows containing non-finite values (stat_summary).
## Warning: Removed 19566 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -3693626 -1 0 3525 2 582794882 9406
## 1% 99%
## -21.19657 23.05324
## Warning: Removed 17198 rows containing non-finite values (stat_summary).
## Warning: Removed 17198 rows containing non-finite values (stat_summary).
## Warning: Removed 17198 rows containing non-finite values (stat_summary).
## Warning: Removed 17198 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -3693626 -1 0 844 2 52206253 7469
## 1% 99%
## -16.57598 19.69155
## Warning: Removed 15161 rows containing non-finite values (stat_summary).
## Warning: Removed 15161 rows containing non-finite values (stat_summary).
## Warning: Removed 15161 rows containing non-finite values (stat_summary).
## Warning: Removed 15161 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -3693626 -1 0 1094 2 53331161 5355
## 1% 99%
## -12.64440 16.78322
## Warning: Removed 12829 rows containing non-finite values (stat_summary).
## Warning: Removed 12829 rows containing non-finite values (stat_summary).
## Warning: Removed 12829 rows containing non-finite values (stat_summary).
## Warning: Removed 12829 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -155949.92 -2.33 0.03 -0.61 2.72 62730.15
## NA's
## 6648
## 1% 99%
## -48.45862 50.61435
## Warning: Removed 9554 rows containing non-finite values (stat_summary).
## Warning: Removed 9554 rows containing non-finite values (stat_summary).
## Warning: Removed 9554 rows containing non-finite values (stat_summary).
## Warning: Removed 9554 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -155949.92 -2.23 0.05 0.08 2.72 61821.25
## NA's
## 4526
## 1% 99%
## -37.70329 41.63569
## Warning: Removed 7438 rows containing non-finite values (stat_summary).
## Warning: Removed 7438 rows containing non-finite values (stat_summary).
## Warning: Removed 7438 rows containing non-finite values (stat_summary).
## Warning: Removed 7438 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -155949.92 -2.09 0.10 0.32 2.80 61821.25
## NA's
## 2852
## 1% 99%
## -31.65871 38.37726
## Warning: Removed 5734 rows containing non-finite values (stat_summary).
## Warning: Removed 5734 rows containing non-finite values (stat_summary).
## Warning: Removed 5734 rows containing non-finite values (stat_summary).
## Warning: Removed 5734 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -155949.92 -1.80 0.19 0.22 2.95 21406.72
## NA's
## 1357
## 1% 99%
## -25.26378 36.80205
## Warning: Removed 4137 rows containing non-finite values (stat_summary).
## Warning: Removed 4137 rows containing non-finite values (stat_summary).
## Warning: Removed 4137 rows containing non-finite values (stat_summary).
## Warning: Removed 4137 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -300.3856 -0.2111 0.0461 0.1520 0.4261 255.1363 1504
## 1% 99%
## -3.450832 4.676999
## Warning: Removed 4510 rows containing non-finite values (stat_summary).
## Warning: Removed 4510 rows containing non-finite values (stat_summary).
## Warning: Removed 4510 rows containing non-finite values (stat_summary).
## Warning: Removed 4510 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -148.2433 -0.1868 0.0644 0.1960 0.4649 255.1363 1065
## 1% 99%
## -3.041596 4.581914
## Warning: Removed 4043 rows containing non-finite values (stat_summary).
## Warning: Removed 4043 rows containing non-finite values (stat_summary).
## Warning: Removed 4043 rows containing non-finite values (stat_summary).
## Warning: Removed 4043 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -148.2433 -0.1475 0.0917 0.2493 0.5221 255.1363 683
## 1% 99%
## -2.656265 4.402796
## Warning: Removed 3609 rows containing non-finite values (stat_summary).
## Warning: Removed 3609 rows containing non-finite values (stat_summary).
## Warning: Removed 3609 rows containing non-finite values (stat_summary).
## Warning: Removed 3609 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -148.24331 -0.08659 0.14417 0.34217 0.62492 108.68948
## NA's
## 309
## 1% 99%
## -2.136397 4.141479
## Warning: Removed 3111 rows containing non-finite values (stat_summary).
## Warning: Removed 3111 rows containing non-finite values (stat_summary).
## Warning: Removed 3111 rows containing non-finite values (stat_summary).
## Warning: Removed 3111 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -111.8991 -0.0313 0.0000 0.0042 0.0391 201.5931 1298
## 1% 99%
## -1.147600 1.217261
## Warning: Removed 3192 rows containing non-finite values (stat_summary).
## Warning: Removed 3192 rows containing non-finite values (stat_summary).
## Warning: Removed 3192 rows containing non-finite values (stat_summary).
## Warning: Removed 3192 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -111.8991 -0.0276 0.0000 0.0012 0.0355 171.3647 1116
## 1% 99%
## -0.8680358 0.8966277
## Warning: Removed 2988 rows containing non-finite values (stat_summary).
## Warning: Removed 2988 rows containing non-finite values (stat_summary).
## Warning: Removed 2988 rows containing non-finite values (stat_summary).
## Warning: Removed 2988 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -111.8991 -0.0228 0.0000 -0.0009 0.0318 58.1832 884
## 1% 99%
## -0.6537521 0.6697761
## Warning: Removed 2720 rows containing non-finite values (stat_summary).
## Warning: Removed 2720 rows containing non-finite values (stat_summary).
## Warning: Removed 2720 rows containing non-finite values (stat_summary).
## Warning: Removed 2720 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -111.8991 -0.0169 0.0000 0.0005 0.0283 50.4421 628
## 1% 99%
## -0.4700265 0.4854451
## Warning: Removed 2390 rows containing non-finite values (stat_summary).
## Warning: Removed 2390 rows containing non-finite values (stat_summary).
## Warning: Removed 2390 rows containing non-finite values (stat_summary).
## Warning: Removed 2390 rows containing missing values (geom_point).
Why is the distribution of slope coefficients centered on zero? (Shouldn’t we expect the center of the distribution to be positive?) It might be possible that the distribution is not centered on zero once we condition on location (but unlikely). We can take a look anyways…
sensorize_time_ranges = list(
c(-7, -1),
c(-10, -1),
c(-14, -1),
c(-21, -1))
QUANTS = c(0.01, 0.99)
# TODO: Add more "core indicators"
for (ind_idx in 1:length(source_names)) {
if (target_names[ind_idx] == 'Cases') {
df_target = df_cases
} else if (target_names[ind_idx] == 'Deaths') {
df_target = df_deaths
} else {
stop(sprintf("No matching dataframe for target %s.", target_names[ind_idx]))
}
base_cor_fname = sprintf('results/03_base_cors_%s_%s.RDS',
source_names[ind_idx], signal_names[ind_idx])
sensorize_fname = sprintf('results/03_sensorize_cors_%s_%s.RDS',
source_names[ind_idx], signal_names[ind_idx])
sensorize_val_fname = sprintf('results/03_sensorize_vals_%s_%s.RDS',
source_names[ind_idx], signal_names[ind_idx])
df_cor_base = readRDS(base_cor_fname)
sensorize_cors = readRDS(sensorize_fname)
sensorized_vals = readRDS(sensorize_val_fname)
for (inner_idx in 1:length(sensorize_time_ranges)) {
sv = sensorized_vals[[inner_idx]]
sv_medmad = sv %>% group_by(
geo_value
) %>% summarize (
med = median(slope),
mad = mad(slope),
med_upper = med+mad,
med_lower = med-mad,
)
coverage = sv_medmad %>% mutate (
contains_zero = (0 < med_upper) & (0 > med_lower),
) %>% pull (
contains_zero
) %>% mean (
na.rm=TRUE
)
print(sprintf('Coverage for %s: %f',
pretty_names[ind_idx],
coverage))
}
}
## `summarise()` ungrouping output (override with `.groups` argument)
## [1] "Coverage for Doctor visits: 0.999278"
## `summarise()` ungrouping output (override with `.groups` argument)
## [1] "Coverage for Doctor visits: 1.000000"
## `summarise()` ungrouping output (override with `.groups` argument)
## [1] "Coverage for Doctor visits: 0.998764"
## `summarise()` ungrouping output (override with `.groups` argument)
## [1] "Coverage for Doctor visits: 0.984410"
## `summarise()` ungrouping output (override with `.groups` argument)
## [1] "Coverage for Facebook CLI: 1.000000"
## `summarise()` ungrouping output (override with `.groups` argument)
## [1] "Coverage for Facebook CLI: 1.000000"
## `summarise()` ungrouping output (override with `.groups` argument)
## [1] "Coverage for Facebook CLI: 0.995320"
## `summarise()` ungrouping output (override with `.groups` argument)
## [1] "Coverage for Facebook CLI: 0.963975"
## `summarise()` ungrouping output (override with `.groups` argument)
## [1] "Coverage for Facebook CLI-in-community: 1.000000"
## `summarise()` ungrouping output (override with `.groups` argument)
## [1] "Coverage for Facebook CLI-in-community: 0.996800"
## `summarise()` ungrouping output (override with `.groups` argument)
## [1] "Coverage for Facebook CLI-in-community: 0.993404"
## `summarise()` ungrouping output (override with `.groups` argument)
## [1] "Coverage for Facebook CLI-in-community: 0.925926"
## `summarise()` ungrouping output (override with `.groups` argument)
## [1] "Coverage for Hospitalizations: 0.936709"
## `summarise()` ungrouping output (override with `.groups` argument)
## [1] "Coverage for Hospitalizations: 0.926941"
## `summarise()` ungrouping output (override with `.groups` argument)
## [1] "Coverage for Hospitalizations: 0.910204"
## `summarise()` ungrouping output (override with `.groups` argument)
## [1] "Coverage for Hospitalizations: 0.870216"